利用到normalization,requests,jsons,爬取dow指數據
# -*- coding: utf-8 -*-
import json
import re
import requests
def retrieve_dji_list():
try:
url = 'http://money.cnn.com/data/dow30/'
res = requests.get(url)
except ConnectionError as e:
print(e)
pattern = re.compile(
'class="wsod_symbol">(.*?)<\/a>.*?<span.*?>(.*?)<\/span>.*?\n.*?class="wsod_stream">(.*?)<\/span>')
dji_list_raw = re.findall(pattern, res.text)
dji_list = []
for item in dji_list_raw:
dji_list.append({
'code': item[0],
'name': item[1],
'price': float(item[2])
})
return dji_list
def retrieve_quotes_historical(stock_code, start = '', end = ''):
quotes = []
url = 'https://finance.yahoo.com/quote/%s/history?p=%s' % (stock_code, stock_code)
try:
r = requests.get(url)
except ConnectionError as err:
print(err)
m = re.findall('"HistoricalPriceStore":{"prices":(.*?),"isPending"', r.text)
if m:
quotes = json.loads(m[0])
quotes = quotes[::-1]
return [item for item in quotes if not 'type' in item]
```